In [7]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import plotly.express as px
In [8]:
df=pd.read_csv("C:\\Users\\djo16\\Cognifyz\\Dataset .csv")
print(df.head())
   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM Megamall, Ortigas, Mandaluyong City   
4      SM Megamall, Ortigas, Mandaluyong City   

                                    Locality Verbose   Longitude   Latitude  \
0  Century City Mall, Poblacion, Makati City, Mak...  121.027535  14.565443   
1  Little Tokyo, Legaspi Village, Makati City, Ma...  121.014101  14.553708   
2  Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...  121.056831  14.581404   
3  SM Megamall, Ortigas, Mandaluyong City, Mandal...  121.056475  14.585318   
4  SM Megamall, Ortigas, Mandaluyong City, Mandal...  121.057508  14.584450   

                           Cuisines  ...          Currency Has Table booking  \
0        French, Japanese, Desserts  ...  Botswana Pula(P)               Yes   
1                          Japanese  ...  Botswana Pula(P)               Yes   
2  Seafood, Asian, Filipino, Indian  ...  Botswana Pula(P)               Yes   
3                   Japanese, Sushi  ...  Botswana Pula(P)                No   
4                  Japanese, Korean  ...  Botswana Pula(P)               Yes   

  Has Online delivery Is delivering now Switch to order menu Price range  \
0                  No                No                   No           3   
1                  No                No                   No           3   
2                  No                No                   No           4   
3                  No                No                   No           4   
4                  No                No                   No           4   

   Aggregate rating  Rating color Rating text Votes  
0               4.8    Dark Green   Excellent   314  
1               4.5    Dark Green   Excellent   591  
2               4.4         Green   Very Good   270  
3               4.9    Dark Green   Excellent   365  
4               4.8    Dark Green   Excellent   229  

[5 rows x 21 columns]
In [9]:
df.tail()
Out[9]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines ... Currency Has Table booking Has Online delivery Is delivering now Switch to order menu Price range Aggregate rating Rating color Rating text Votes
9546 5915730 Naml۱ Gurme 208 ��stanbul Kemanke�� Karamustafa Pa��a Mahallesi, R۱ht۱m ... Karak�_y Karak�_y, ��stanbul 28.977392 41.022793 Turkish ... Turkish Lira(TL) No No No No 3 4.1 Green Very Good 788
9547 5908749 Ceviz A��ac۱ 208 ��stanbul Ko��uyolu Mahallesi, Muhittin ��st�_nda�� Cadd... Ko��uyolu Ko��uyolu, ��stanbul 29.041297 41.009847 World Cuisine, Patisserie, Cafe ... Turkish Lira(TL) No No No No 3 4.2 Green Very Good 1034
9548 5915807 Huqqa 208 ��stanbul Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... Kuru�_e��me Kuru�_e��me, ��stanbul 29.034640 41.055817 Italian, World Cuisine ... Turkish Lira(TL) No No No No 4 3.7 Yellow Good 661
9549 5916112 A���k Kahve 208 ��stanbul Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... Kuru�_e��me Kuru�_e��me, ��stanbul 29.036019 41.057979 Restaurant Cafe ... Turkish Lira(TL) No No No No 4 4.0 Green Very Good 901
9550 5927402 Walter's Coffee Roastery 208 ��stanbul Cafea��a Mahallesi, Bademalt۱ Sokak, No 21/B, ... Moda Moda, ��stanbul 29.026016 40.984776 Cafe ... Turkish Lira(TL) No No No No 2 4.0 Green Very Good 591

5 rows × 21 columns

In [10]:
df.shape
Out[10]:
(9551, 21)
In [11]:
df.isnull()
Out[11]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines ... Currency Has Table booking Has Online delivery Is delivering now Switch to order menu Price range Aggregate rating Rating color Rating text Votes
0 False False False False False False False False False False ... False False False False False False False False False False
1 False False False False False False False False False False ... False False False False False False False False False False
2 False False False False False False False False False False ... False False False False False False False False False False
3 False False False False False False False False False False ... False False False False False False False False False False
4 False False False False False False False False False False ... False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9546 False False False False False False False False False False ... False False False False False False False False False False
9547 False False False False False False False False False False ... False False False False False False False False False False
9548 False False False False False False False False False False ... False False False False False False False False False False
9549 False False False False False False False False False False ... False False False False False False False False False False
9550 False False False False False False False False False False ... False False False False False False False False False False

9551 rows × 21 columns

In [12]:
columns=df.columns.tolist()
print(columns)
['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes']
In [13]:
df['Cuisines'] = df['Cuisines'].fillna('Unknown')
print(df['Cuisines'].value_counts())
Cuisines
North Indian                                             936
North Indian, Chinese                                    511
Chinese                                                  354
Fast Food                                                354
North Indian, Mughlai                                    334
                                                        ... 
Bengali, Fast Food                                         1
North Indian, Rajasthani, Asian                            1
Chinese, Thai, Malaysian, Indonesian                       1
Bakery, Desserts, North Indian, Bengali, South Indian      1
Italian, World Cuisine                                     1
Name: count, Length: 1826, dtype: int64
In [14]:
df.isnull().sum()
Out[14]:
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                0
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

LEVEL - 1¶

Task 1: Top Cuisines¶

  • Determine the top three most common cuisines in the dataset.
In [18]:
cuisine_counts = df['Cuisines'].value_counts()
cuisine_counts
Out[18]:
Cuisines
North Indian                                             936
North Indian, Chinese                                    511
Chinese                                                  354
Fast Food                                                354
North Indian, Mughlai                                    334
                                                        ... 
Bengali, Fast Food                                         1
North Indian, Rajasthani, Asian                            1
Chinese, Thai, Malaysian, Indonesian                       1
Bakery, Desserts, North Indian, Bengali, South Indian      1
Italian, World Cuisine                                     1
Name: count, Length: 1826, dtype: int64
In [20]:
cuisine_counts.head(5)
Out[20]:
Cuisines
North Indian             936
North Indian, Chinese    511
Chinese                  354
Fast Food                354
North Indian, Mughlai    334
Name: count, dtype: int64
In [26]:
print(df['Cuisines'].unique())
['French, Japanese, Desserts' 'Japanese'
 'Seafood, Asian, Filipino, Indian' ... 'Burger, Izgara'
 'World Cuisine, Patisserie, Cafe' 'Italian, World Cuisine']
In [32]:
top3=cuisine_counts.head(3)
top3
Out[32]:
Cuisines
North Indian             936
North Indian, Chinese    511
Chinese                  354
Name: count, dtype: int64
In [34]:
fig = px.bar(
    x=top3.index,      
    y=top3.values,     
    title='Top Three Cuisines',
    labels={'x': 'Cuisines', 'y': 'Number of Restaurants'},    
    color=top3.values, 
    color_continuous_scale=px.colors.sequential.Plasma        
)
fig.show()
  • Calculate the percentage of restaurants that serve each of the top cuisines.
In [36]:
total_r= len(df)
total_r
Out[36]:
9551
In [37]:
for  Cuisines, i  in top3.items():
    percentage= round( (i / total_r * 100), 2)
    print(f"{Cuisines}: {percentage}%")
    
North Indian: 9.8%
North Indian, Chinese: 5.35%
Chinese: 3.71%
In [38]:
results = pd.DataFrame({
    'Cuisine Type': list(top3.keys()),
    'Number of Restaurants': list(top3.values),
    'Percentage': percentage
})
labels = results['Cuisine Type']
sizes = results['Percentage']

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, wedgeprops={'width':0.4})
ax.axis('equal')
plt.title('Top 3 Cuisines by % of Restaurants')
plt.show()
No description has been provided for this image

Task 2 : City Analysis¶

Identify the city with the highest numberof restaurants in the dataset.

In [41]:
city_counts= df['City'].value_counts()
print(city_counts)
city_counts.index[0]
City
New Delhi           5473
Gurgaon             1118
Noida               1080
Faridabad            251
Ghaziabad             25
                    ... 
Panchkula              1
Mc Millan              1
Mayfield               1
Macedon                1
Vineland Station       1
Name: count, Length: 141, dtype: int64
Out[41]:
'New Delhi'

Calculate the average rating for restaurants in each city.

In [48]:
avg= df.groupby('City')['Aggregate rating'].mean().reset_index()
print("*--------------Average Ratings city wise-----------*")
print(avg.round(2))
*--------------Average Ratings city wise-----------*
                City  Aggregate rating
0          Abu Dhabi              4.30
1               Agra              3.96
2          Ahmedabad              4.16
3             Albany              3.56
4          Allahabad              3.40
..               ...               ...
136          Weirton              3.90
137  Wellington City              4.25
138   Winchester Bay              3.20
139          Yorkton              3.30
140        ��stanbul              4.29

[141 rows x 2 columns]

Determine the city with the highest average rating.

In [51]:
top_city = avg.sort_values(by='Aggregate rating', ascending=False).reset_index()
top_rated_city= avg.max().reset_index()
print(f"Top City: {top_city}, Rating: {top_rated_city}")
Top City:      index              City  Aggregate rating
0       56        Inner City          4.900000
1      107       Quezon City          4.800000
2       73       Makati City          4.650000
3       95        Pasig City          4.633333
4       75  Mandaluyong City          4.625000
..     ...               ...               ...
136     88         New Delhi          2.438845
137     83         Montville          2.400000
138     78         Mc Millan          2.400000
139     89             Noida          2.036204
140     43         Faridabad          1.866932

[141 rows x 3 columns], Rating:               index          0
0              City  ��stanbul
1  Aggregate rating        4.9
In [53]:
print(type(top_rated_city))
<class 'pandas.core.frame.DataFrame'>
In [55]:
print("Top City:", top_city.iloc[0]['City'])
print("Top Rating:", top_city.iloc[0]['Aggregate rating'])
Top City: Inner City
Top Rating: 4.9
In [ ]:
 

Task 3: Price Range Distribution¶

Create a histogram or bar chart tovisualize the distribution of price rangesamong the restaurants.

In [61]:
print(df['Price range'].unique())
[3 4 2 1]
In [65]:
price_counts = df['Price range'].value_counts().sort_index()
print(price_counts)
Price range
1    4444
2    3113
3    1408
4     586
Name: count, dtype: int64
In [81]:
price_percent = round((price_counts/ price_counts.sum()) * 100, 2)

price_data = pd.DataFrame({
    'Price range': price_counts.index,
    'Number of Restaurants': price_counts.values,
    'Percentage': price_percent.values
})

print(price_data)
   Price range  Number of Restaurants  Percentage
0            1                   4444       46.53
1            2                   3113       32.59
2            3                   1408       14.74
3            4                    586        6.14
In [83]:
fig = px.bar(
    price_data,
    x='Price range',
    y='Number of Restaurants',
    title='Distribution of Restaurants by Price range',
    color_discrete_sequence=['skyblue']
)

fig.update_layout(
    xaxis_title='Price range',
    yaxis_title='Number of Restaurants',
    plot_bgcolor='white',
    yaxis=dict(showgrid=True, gridcolor='lightgray'),
)

fig.show()
In [ ]:
 

Calculate the percentage of restaurantsin each price range category.

In [154]:
fig = px.pie(price_data, names='Price range', values='Percentage', title='Percentage of Restaurants by Price Range')
fig.show()
In [ ]:
 

Task 4: Online Delivery¶

Determine the percentage of restaurants that offer online delivery.

In [91]:
print(df['Has Online delivery'].unique())
['No' 'Yes']
In [95]:
delivery_counts = df['Has Online delivery'].value_counts()
In [99]:
length=len(df[df['Has Online delivery']=='Yes'])
In [107]:
total=len(df)
total
Out[107]:
9551
In [146]:
delivery_percentage = round((length / total) * 100, 2)
In [148]:
print(delivery_percentage)
25.66

Compare the average ratings of restaurants with and without online delivery.

In [140]:
avg_ratings = df.groupby('Has Online delivery')['Aggregate rating'].mean()
In [142]:
print(avg_ratings)
Has Online delivery
No     2.465296
Yes    3.248837
Name: Aggregate rating, dtype: float64
In [131]:
plt.figure(figsize=(6, 4))
avg_ratings.plot(kind='bar', color=['coral', 'lightblue'])
plt.title('Average Ratings: With vs Without Online Delivery')
plt.ylabel('Average Rating')
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
No description has been provided for this image
In [ ]: